# Regional Centers: Regression Models 
# Date: 5/2/2023
# POC: Solomon Major, Jack Scalia, and Tyler Liboro

libs <- c('readxl', 'ggplot2', 'caret', 'dplyr', 'zoo', 
          'tidyverse', 'plm', 'texreg', 'webshot', 'corrplot', 'data.table')
lapply(libs, require, character.only = T)
`%>%` <- magrittr::`%>%` # Use pipe ----

# -------------------------------------------------------------------------------------------
#----------------------PRE PROCESSING -------------------------------------------------------
# -------------------------------------------------------------------------------------------


# Read in data
f_path <- paste0(getwd(), '/Surge Support - Regional Centers/data/rc dataset v8.0.xlsx') 
data <- readxl::read_excel(path = f_path, sheet = 'dataset') %>% as.data.table


#Window dataset to only be greater than or equal to 2009: RCPAMS may not go that far
df <- df %>% 
  .[year >= 2009] %>% #Consider data  after 2009
  .[southcom == 1,] %>% #Only look at south american countries
  .[, year := as.integer(year)] %>% #pre processing
  .[, nominal_wgi_pc := as.integer(nominal_wgi_pc)] %>% #pre processing
  
  .[, cday_total_1 := cday_total + .1] %>% #Transforming  Target Varibales
  .[, ln_cday_total := log(cday_total_1)] %>% #log response variable
  
  .[, cday_312 := cday_312 + .1] %>% #Transforming  Target Varibales
  .[, ln_cday_312 := log(cday_312)] %>% #log response variable
  
  .[, cday_312_mil := cday_312_mil + .1] %>% #Transforming  Target Varibales
  .[, ln_cday_312_mil := log(cday_312_mil)] #log response variable


### Time Variant Variables: Mean -----------------------------------------------
#Create new column that holds the mean per group (un_num)
df <- df %>% 
  
  .[, mean_nominal_wgi_pc := mean(nominal_wgi_pc, na.rm = T), by = un_num] %>%
  .[, mean_real_wgi_pc := mean(real_wgi_pc, na.rm = T), by = un_num] %>%
  .[, mean_freehse_pr := mean(freehse_pr, na.rm = T), by = un_num] %>%
  .[, mean_freehse_cl := mean(freehse_cl, na.rm = T), by = un_num] %>%
  
  .[, mean_exports_world := mean(exports_world, na.rm = T), by = un_num] %>%
  .[, mean_imports_world := mean(imports_world, na.rm = T), by = un_num] %>%
  .[, mean_exports_to_us := mean(exports_to_us, na.rm = T), by = un_num] %>%
  .[, mean_imports_from_us := mean(imports_from_us, na.rm = T), by = un_num] %>%
  
  .[, mean_wgi_voice := mean(wgi_voice, na.rm = T), by = un_num] %>%
  .[, mean_wgi_stability := mean(wgi_stability, na.rm = T), by = un_num] %>%
  .[, mean_wgi_govteffective := mean(wgi_govteffective, na.rm = T), by = un_num] %>%
  .[, mean_wgi_regulation := mean(wgi_regulation, na.rm = T), by = un_num] %>%
  .[, mean_wgi_ruleoflaw := mean(wgi_ruleoflaw, na.rm = T), by = un_num] %>%
  .[, mean_wgi_coc := mean(wgi_coc, na.rm = T), by = un_num] %>%
  
  .[, mean_usa_oda_net := mean(usa_oda_net, na.rm = T), by = un_num] %>%
  .[, mean_world_oda_net := mean(world_oda_net, na.rm = T), by = un_num] %>%
  
  .[, mean_pko_total := mean(pko_total, na.rm = T), by = un_num] %>%
  .[, mean_pko_troops := mean(pko_troops, na.rm = T), by = un_num] %>%
  .[, mean_pko_police := mean(pko_police, na.rm = T), by = un_num] %>%
  .[, mean_pko_expobs := mean(pko_expobs, na.rm = T), by = un_num] %>%
  
  .[, mean_dsca_total_agreements := mean(dsca_total_agreements, na.rm = T), by = un_num] %>%
  .[, mean_year := mean(year, na.rm = T), by = un_num] %>%
  
  .[, mean_pct_mil_pers := mean(pct_mil_pers, na.rm = T), by = un_num] %>%
  .[, mean_unga_diff := mean(unga_diff, na.rm = T), by = un_num] %>%
  .[, mean_fsi_total := mean(fsi_total, na.rm = T), by = un_num] %>%
  .[, mean_mil_aid := mean(mil_aid, na.rm = T), by = un_num] %>%
  .[, mean_312_eligible_h := mean(`312_eligible_h`, na.rm = T), by = un_num] %>%
  
  .[, mean_hdi_score := mean(hdi_score, na.rm = T), by = un_num] %>%
  .[, mean_1m_real_gdp_pc := mean(real_gdp_pc/1000000, na.rm = T), by = un_num] %>%
  .[, mean_1m_trade_usa := mean((exports_to_us + imports_from_us)/1000000, na.rm = T), by = un_num] %>%
  .[, mean_1m_pop := mean(pop/1000000, na.rm = T), by = un_num] 


### Time Variant Variables: Demean --------------------------------------------
#Create new column that has time variant observation demeaned: (x - xmean)
df <- df %>% 
  
  .[, demean_nominal_wgi_pc := nominal_wgi_pc - mean_nominal_wgi_pc, by = un_num] %>%
  .[, demean_real_wgi_pc := real_wgi_pc - mean_real_wgi_pc, by = un_num] %>%
  .[, demean_freehse_pr := freehse_pr - mean_freehse_pr, by = un_num] %>%
  .[, demean_freehse_cl := freehse_cl - mean_freehse_cl, by = un_num] %>%
  
  .[, demean_exports_world := exports_world - mean_exports_world, by = un_num] %>%
  .[, demean_imports_world := imports_world - mean_imports_world, by = un_num] %>%
  .[, demean_exports_to_us := exports_to_us - mean_exports_to_us, by = un_num] %>%
  .[, demean_imports_from_us := imports_from_us - mean_imports_from_us, by = un_num] %>%
  
  .[, demean_wgi_voice := wgi_voice - mean_wgi_voice, by = un_num] %>%
  .[, demean_wgi_stability := wgi_stability - mean_wgi_stability, by = un_num] %>%
  .[, demean_wgi_govteffective := wgi_govteffective - mean_wgi_govteffective, by = un_num] %>%
  .[, demean_wgi_regulation := wgi_regulation - mean_wgi_regulation, by = un_num] %>%
  .[, demean_wgi_ruleoflaw := wgi_ruleoflaw - mean_wgi_ruleoflaw, by = un_num] %>%
  .[, demean_wgi_coc := wgi_coc - mean_wgi_coc, by = un_num] %>%
  
  .[, demean_usa_oda_net := usa_oda_net - mean_usa_oda_net, by = un_num] %>%
  .[, demean_world_oda_net := world_oda_net - mean_world_oda_net, by = un_num] %>%
  
  .[, demean_pko_total := pko_total - mean_pko_total, by = un_num] %>%
  .[, demean_pko_troops := pko_troops - mean_pko_troops, by = un_num] %>%
  .[, demean_pko_police := pko_police - mean_pko_police, by = un_num] %>%
  .[, demean_pko_expobs := pko_expobs - mean_pko_expobs, by = un_num] %>%
  
  .[, demean_dsca_total_agreements := dsca_total_agreements - mean_dsca_total_agreements, by = un_num] %>%
  .[, demean_year := year - mean_year, by = un_num] %>%
  
  .[, demean_pct_mil_pers := pct_mil_pers - mean_pct_mil_pers, by = un_num] %>%
  .[, demean_unga_diff := unga_diff - mean_unga_diff, by = un_num] %>%
  .[, demean_fsi_total := fsi_total - mean_fsi_total , by = un_num] %>%
  .[, demean_mil_aid := mil_aid - mean_mil_aid, by = un_num] %>%
  .[, demean_312_eligible_h :=  `312_eligible_h` - mean_312_eligible_h, by = un_num] %>%
  
  .[, demean_hdi_score := hdi_score - mean_hdi_score, by = un_num] %>%
  
  #Need to transform first number since meaned_variable was transformed (ex.divided by 1000)
  .[, demean_1m_real_gdp_pc := real_gdp_pc/1000000 - mean_1m_real_gdp_pc, by = un_num] %>%
  .[, demean_1m_trade_usa := (exports_to_us + imports_from_us)/1000000 - mean_1m_trade_usa, by = un_num] %>%
  .[, demean_1m_pop := pop/1000000 - mean_1m_pop, by = un_num] 


# -------------------------------------------------------------------------------------------
#----------------------DATA VISUALIZATION ---------------------------------------------------
# -------------------------------------------------------------------------------------------

# (1) Check for Multicollinearity --------------------------------------
#Remove any non-numerical variables to test for Multi-collinearity
# remove <- c('v_look', 'country_name', 'iso_alpha3', 'gcc', 'wb_income_grp',
#             'southcom', 'africom', 'eucom', 'centcom', 'indopacom', 'northcom')
# df <- df %>% select(-remove) #corr_vars
# corrplot(cor(corr_vars, use="pairwise.complete.obs"), tl.cex = .7,
#        assign.col = "min2max", cl.ratio = .01) #nas present -> pairwise


# (2) Barchart over time per country ---------------------------
#Fill based on 312 eligibility. Barchart over time
# ggplot(data = df, aes(x = year, y = cday_physical_total, 
#   fill = `312_eligible_h`)) +
#   geom_col(position = 'dodge') + 
#   facet_wrap('iso_alpha3', scales = "free")

# (3) Create histogram of dependent variable ------------------
# hist(df$cday_total)
# hist(log(df$cday_total)) #better







# -------------------------------------------------------------------------------------------
#----------------------MODELING--------------------------------------------------------------
# -------------------------------------------------------------------------------------------

#Regional Centers Modeling  ----------------------------------------
# Model 1 Formula: -------------------------------------------------
mod_1 <-
  stats::formula(

    #Target Variable ----------------------------------------------
    ln_cday_312
    
    #Time Variant Observations (demeaned)
    ~  demean_312_eligible_h 
    + mean_312_eligible_h
    
    + demean_hdi_score
    + mean_hdi_score
    
    + demean_unga_diff
    + mean_unga_diff
    
    + demean_1m_real_gdp_pc
    + mean_1m_real_gdp_pc
        
    + demean_1m_trade_usa
    + mean_1m_trade_usa
    
    + demean_1m_pop
    + mean_1m_pop    
    
    
    # + demean_pct_mil_pers
    # + demean_covid_flag
    # + demean_year
    # + mean_pct_mil_pers
    # + mean_covid_flag
    # + mean_year
    
    
    #Interaction(s): 

        
    #Time Invariant Observations
    # + ln_mindist_usa
    
    #Random Effects ----------------------------------------------------
    + (1 | un_num)
    + (0 + demean_312_eligible_h | un_num)
    + (0 + demean_unga_diff | un_num)
    + (0 + demean_hdi_score | un_num)
    + (0 + demean_1m_real_gdp_pc | un_num)
    + (0 + demean_1m_trade_usa | un_num)
    + (0 + demean_1m_pop | un_num)

  )

# Fit Linear Mixed Effects Model
mod_1 <- lme4::lmer(
  mod_1,
  data = df,
  control = lme4::lmerControl(), #Originially Nelder_Mead, but accepting default
  REML = FALSE
)

summary(mod_1)








# Table for all
# EDIT: Map variables to names you would like in regression output
# variable_mapping <- list('post_stayTRUE'='SHELTER IN PLACE', 'mean_post_stay'='MEAN SHELTER IN PLACE', 'log_pop_dens'='ln(POP DENSITY)', 'PovertyAllAgesPct'='% POVERTY', 'Age65AndOlderPct2010.x'='% POP OVER 65', 'WhiteNonHispanicPct2010'='% WHITE', 'centered_cum_cases'='CUM. CASES (SPLINE 1)', 'peak_to_fall_cases'='CUM. CASES (SPLINE 2)', 'fall_to_last_cases'='CUM. CASES (SPLINE 3)', 'stay_days'='DAYS SINCE SHELTER ORDER', 'mean_stay_days'='MEAN DAYS SINCE ORDER', 'RuralUrbanContinuumCode2013'='RURAL/URBAN CODE', 'UrbanInfluenceCode2013'='URBAN INFLUENCE', 'Metro2013'='METRO AREA', 'Metro_Adjacent2013'='METRO ADJACENT', 'centered_stay_days:bin_pop0'='SHELTER*DENSITY', 'centered_stay_days:bin_pov0'='SHELTER*POVERTY', '(Intercept)'='Constant')
models <- list(extract(mod_1, include.aic=FALSE, include.bic=FALSE, include.loglik=FALSE, include.deviance=FALSE))

# EDIT: Table name
table_name = 'table_9_11'
# EDIT: Title of the table
title <- 'Regional Centers: Perry Centers: Dependent Variable: ln_cday_312'
# EDIT: Table notes for additional information you would like to include about models or output
table_notes = 'Standard errors are in (parentheses): %stars.'
# EDIT: Additional rows to add to the table ( add to list using the following format: 'Additional row'=rep('value', length(models)) )
# row_info <- list('Groups (counties)'=rep(1438, length(models)))
# EDIT: Digits to include following decimal point
n_digits = 3
# EDIT: Coefficients and std. errors on a single row
single_row = TRUE
# DON'T EDIT: Commands to generate table
filename <- paste("C:/Users/600770/Documents/",table_name, "_", gsub(":", "_", format(Sys.time(), format = "%T")), ".html", sep='')
htmlreg(models,
        # custom.coef.map = variable_mapping,
        # custom.gof.rows=row_info,
        digits=n_digits,
        custom.note = table_notes,
        custom.model.names = c("Regional Center Models"),
        caption=title,
        caption.above=TRUE,
        single.row = single_row,
        file = filename,
        inline.css = TRUE,
        doctype = TRUE,
        html.tag = TRUE,
        head.tag = TRUE,
        body.tag = TRUE)
webshot(filename, paste("C:/Users/600770/Documents/",table_name,".pdf", sep=''))






library(MuMIn) #https://cran.r-project.org/web/packages/MuMIn/MuMIn.pdf
r.squaredGLMM(mod_1)



output <- summary(mod_1)$coefficients
output2 <- as.data.table(x = output, keep.rownames = TRUE)
output2

# library(openxlsx)
# write.xlsx(x = output2, file = paste0(dirname(f_path), '/output.xlsx'), sheetName = "output", append = TRUE)

